Packages
`%nin%` = Negate(`%in%`)
if (!require("tidyverse")) install.packages("tidyverse")
library("tidyverse") #tidyr, ggplot, dplyr, etc
if (!require("RColorBrewer")) install.packages("RColorBrewer")
library("RColorBrewer") # plot colors
if (!require("rmdformats")) install.packages("rmdformats")
library("rmdformats") # clean html R markdown formatBackground
This code walks through the data wrangling for the analysis of a study on the rate of change of cutaneous evaporative water loss (CEWL) in response to temperature change in Western Fence Lizards (Sceloporus occidentalis). Importantly, this is where we calculate the average CEWL values per body temperature for live lizards. Data was collected and analyzed by Calvin Davis and Savannah Weaver, under the advising of Dr. Emily Taylor at California Polytechnic State University.
Load Data
Collection Data
Variables:
- date = date of capture
- time_capture = time lizards were captured
- individual ID for each lizard
- dorsal couple = label of which dorsal surface thermocouple was used
- cloacal couple = label of which thermocouple was inserted into cloaca
- dorsal/cloacal lead port = which port thermocouples were plugged into the thermologger
- time in chamber = time lizards were placed in incubation chambers
- time out chamber = time lizards were removed from incubation chamber
- chamber time elapsed min = amount of time lizards spent in incubation chamber in minutes
- chamber time elapsed hr = amount of time lizards spent in incubation chamber in hours
Load in data, format, and calculate additional metrics.
collection_data <- read.csv("./Data/Collection Data.csv",
header = TRUE,
fileEncoding="UTF-8-BOM") %>% #removes junk characters for first column name
mutate(date_time_capture = (as.POSIXct(paste(date, time_capture),
format = "%d/%m/%Y %H:%M")),
rounded_date_time = round.POSIXt(date_time_capture, units = "hours"),
date = as.Date(date, format = "%d/%m/%Y"),
time_capture = as.POSIXct(time_capture, format = "%H:%M"),
individual_ID = as.factor(individual_ID),
treatment = as.factor(treatment),
dorsal_couple = as.factor(dorsal_couple),
dorsal_lead_port = as.factor(dorsal_lead_port),
cloacal_couple = as.factor(cloacal_couple),
cloacal_lead_port = as.factor(cloacal_lead_port),
time_in_chamber = as.POSIXct(time_in_chamber,
format = "%H:%M"),
hold_time_hr = (as.numeric(time_in_chamber - time_capture))/60,
time_out_chamber = as.POSIXct(time_out_chamber,
format = "%H:%M"),
chamber_time_elapsed_min = as.numeric(time_out_chamber - time_in_chamber),
chamber_time_elapsed_hr = chamber_time_elapsed_min/60,
# add estimated "chamber time" for control lizards
chamber_time_elapsed_hr = (case_when(treatment == 'Control' ~ 1,
treatment != 'Control' ~ chamber_time_elapsed_hr)) # used as.factor to check that elapsed time = 1hr for 8336 obs of control lizards
)
summary(collection_data)## date time_capture sock
## Min. :2021-10-05 Min. :2023-10-23 10:10:00.00 Length:39
## 1st Qu.:2021-10-11 1st Qu.:2023-10-23 10:38:00.00 Class :character
## Median :2021-10-12 Median :2023-10-23 11:06:00.00 Mode :character
## Mean :2021-10-13 Mean :2023-10-23 11:08:24.61
## 3rd Qu.:2021-10-18 3rd Qu.:2023-10-23 11:20:30.00
## Max. :2021-10-19 Max. :2023-10-23 16:00:00.00
##
## individual_ID treatment mass_g SVL_mm
## 401 : 1 Control:12 Min. : 8.70 Min. :60.00
## 402 : 1 Cooling:14 1st Qu.: 9.90 1st Qu.:63.00
## 403 : 1 Heating:13 Median :11.20 Median :65.00
## 404 : 1 Mean :11.32 Mean :66.03
## 405 : 1 3rd Qu.:12.50 3rd Qu.:68.50
## 406 : 1 Max. :15.30 Max. :75.00
## (Other):33
## time_in_chamber time_out_chamber dorsal_couple
## Min. :2023-10-23 12:28:00.00 Min. :2023-10-23 13:35:00 A :8
## 1st Qu.:2023-10-23 13:16:30.00 1st Qu.:2023-10-23 14:28:00 D :7
## Median :2023-10-23 14:09:00.00 Median :2023-10-23 15:15:00 B :5
## Mean :2023-10-23 14:14:31.10 Mean :2023-10-23 15:23:00 F :5
## 3rd Qu.:2023-10-23 15:11:30.00 3rd Qu.:2023-10-23 16:27:30 H :5
## Max. :2023-10-23 16:36:00.00 Max. :2023-10-23 17:33:00 E :4
## NA's :12 NA's :12 (Other):5
## dorsal_lead_port cloacal_couple cloacal_lead_port notes
## T2:39 E :10 T1:39 Length:39
## F : 8 Class :character
## B : 5 Mode :character
## C : 5
## D : 5
## A : 3
## (Other): 3
## date_time_capture rounded_date_time hold_time_hr
## Min. :2021-10-05 10:10:00 Min. :2021-10-05 10:00:00.00 Min. :0.600
## 1st Qu.:2021-10-11 10:31:30 1st Qu.:2021-10-11 11:00:00.00 1st Qu.:2.058
## Median :2021-10-12 11:03:00 Median :2021-10-12 11:00:00.00 Median :2.983
## Mean :2021-10-13 11:45:20 Mean :2021-10-13 11:46:09.23 Mean :2.958
## 3rd Qu.:2021-10-18 11:22:00 3rd Qu.:2021-10-18 11:00:00.00 3rd Qu.:3.792
## Max. :2021-10-19 11:58:00 Max. :2021-10-19 12:00:00.00 Max. :5.583
## NA's :12
## chamber_time_elapsed_min chamber_time_elapsed_hr
## Min. :56.00 Min. :0.9333
## 1st Qu.:62.00 1st Qu.:1.0000
## Median :66.00 Median :1.0333
## Mean :68.48 Mean :1.0979
## 3rd Qu.:74.50 3rd Qu.:1.1417
## Max. :92.00 Max. :1.5333
## NA's :12
Collection Stats for Permitting
## # A tibble: 5 × 2
## date n
## <date> <int>
## 1 2021-10-05 8
## 2 2021-10-11 8
## 3 2021-10-12 7
## 4 2021-10-18 8
## 5 2021-10-19 8
Mean Values by Tmt
means <- collection_data %>%
group_by(treatment) %>%
summarise(mean(mass_g),
sd(mass_g),
mean(SVL_mm),
sd(SVL_mm),
mean(chamber_time_elapsed_min))
means## # A tibble: 3 × 6
## treatment `mean(mass_g)` `sd(mass_g)` `mean(SVL_mm)` `sd(SVL_mm)` mean(chamb…¹
## <fct> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Control 11.4 1.49 66.8 2.77 NA
## 2 Cooling 11.3 1.77 66.1 4.35 72.1
## 3 Heating 11.3 2.06 65.2 3.61 64.5
## # … with abbreviated variable name ¹`mean(chamber_time_elapsed_min)`
## Df Sum Sq Mean Sq F value Pr(>F)
## treatment 2 0.07 0.035 0.011 0.989
## Residuals 36 116.23 3.229
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = mass_g ~ treatment, data = collection_data)
##
## $treatment
## diff lwr upr p adj
## Cooling-Control -0.09761905 -1.825449 1.630211 0.9895441
## Heating-Control -0.08333333 -1.841567 1.674901 0.9926295
## Heating-Cooling 0.01428571 -1.677382 1.705954 0.9997651
CEWL Data
First, need to get the start times for each AquaFlux run:
# load data
AF_starts_wide <- read.csv("./Data/AquaFlux start times.csv",
fileEncoding="UTF-8-BOM")
# reorganize
rownames(AF_starts_wide) <- AF_starts_wide$Probe
AF_starts_wide$Probe <- NULL
AF_starts_tidy <- as.data.frame(t(as.matrix(AF_starts_wide))) %>%
mutate(lizard_ID = as.factor(Comments),
date_time_start = as.POSIXct(paste(date,time_start),
format = "%m/%d/%y %I:%M:%S %p"),
date = as.Date(date, format = "%m/%d/%y")
) %>%
dplyr::select(lizard_ID, date, date_time_start)
summary(AF_starts_tidy)## lizard_ID date date_time_start
## 401 : 1 Min. :2021-10-05 Min. :2021-10-05 13:21:12.00
## 402 : 1 1st Qu.:2021-10-11 1st Qu.:2021-10-11 14:28:24.50
## 403 : 1 Median :2021-10-12 Median :2021-10-12 15:10:09.00
## 404 : 1 Mean :2021-10-13 Mean :2021-10-13 15:59:48.04
## 405 : 1 3rd Qu.:2021-10-18 3rd Qu.:2021-10-18 16:47:25.50
## 406 : 1 Max. :2021-10-19 Max. :2021-10-19 16:30:20.00
## (Other):33
Get filenames:
# make a list of file names of all data to load in
filenames_aquaflux <- list.files(path = "./Data/AquaFlux data",
pattern = ".csv")Make a function that will read in the data from each csv, name and organize the data correctly.
Variables that will be read-in related to CEWL:
- time sec = time recording from Evaporimeter
- CEWL_g_m2h = cutaneous evaporative water loss
- msmt_temp_C = ambient temperature in Celsius
- msmt_RH_percent = % relative ambient humidity
read_CEWL_file <- function(filename) {
# save helpful info
date <- substr(filename, 1, 10)
name <- substr(filename, 19, 21)
## load file
dat <- read.csv(file.path("./Data/AquaFlux data", filename),
header = TRUE,
fileEncoding="UTF-8-BOM") %>%
# select only the relevant values
dplyr::select(time_sec = 'Time..sec.',
CEWL_g_m2h = 'Flux..g..m2h..',
msmt_temp_C = 'AmbT..C.',
msmt_RH_percent = 'AmbRH....'
) %>%
# use filename info
dplyr::mutate(date = date,
lizard_ID = name)
# return the dataframe for that single csv file
dat
}Apply function to all files, compile, and incorporate timing:
# apply function to get data from all csvs
all_CEWL_data <- lapply(filenames_aquaflux, read_CEWL_file) %>%
# paste all data files together into one df by row
reduce(rbind) %>%
# properly format data classes
mutate(lizard_ID = as.factor(lizard_ID),
date = as.Date(date, format = "%d-%m-%Y")
) %>%
# add actual times
left_join(AF_starts_tidy, by = c("date", "lizard_ID")) %>%
# calculate current times for each point
mutate(date_time = date_time_start + floor(time_sec))
summary(all_CEWL_data)## time_sec CEWL_g_m2h msmt_temp_C msmt_RH_percent
## Min. : 0.0 Min. : 0.03 Min. :24.00 Min. :21.10
## 1st Qu.:225.0 1st Qu.: 7.26 1st Qu.:24.80 1st Qu.:32.40
## Median :450.2 Median :12.62 Median :25.30 Median :33.70
## Mean :450.3 Mean :13.42 Mean :25.16 Mean :35.09
## 3rd Qu.:675.7 3rd Qu.:18.44 3rd Qu.:25.50 3rd Qu.:37.40
## Max. :901.0 Max. :37.40 Max. :26.00 Max. :48.20
##
## date lizard_ID date_time_start
## Min. :2021-10-05 417 : 887 Min. :2021-10-05 13:21:12.00
## 1st Qu.:2021-10-11 409 : 886 1st Qu.:2021-10-11 14:11:19.00
## Median :2021-10-12 410 : 886 Median :2021-10-12 15:10:09.00
## Mean :2021-10-13 411 : 886 Mean :2021-10-13 15:59:35.94
## 3rd Qu.:2021-10-18 412 : 886 3rd Qu.:2021-10-18 17:01:43.00
## Max. :2021-10-19 413 : 886 Max. :2021-10-19 16:30:20.00
## (Other):29216
## date_time
## Min. :2021-10-05 13:21:12.00
## 1st Qu.:2021-10-11 14:22:37.00
## Median :2021-10-12 15:17:35.00
## Mean :2021-10-13 16:07:05.77
## 3rd Qu.:2021-10-18 17:05:27.00
## Max. :2021-10-19 16:45:20.00
##
## [1] "2021-10-05" "2021-10-11" "2021-10-12" "2021-10-18" "2021-10-19"
Weather Data
get filenames:
Weather data for each capture day includes the following variables:
- date
- time
- temp_f = temperature in Fahrenheit
- wind_speed_mph = wind speed in miles per hr
- relative_humidity_percent = % relative humidity
- solar_rad_w_m2 = Watts per square meter of solar radiation
- precip_inches = inches of precipitation
read_weather_file <- function(filename) {
# read file
dat <- read.csv(file.path("./Data/Weather Data", filename),
header = TRUE, # each csv has headers
check.names = F,
sep = ";" #separate data via semicolons
)
# return the dataframe for that single csv file
dat
}Apply function to all dataframes and compile:
# apply function to get data from all csvs
all_weather_data <- lapply(filenames_weather, read_weather_file) %>%
# paste all data files together into one df by row
reduce(rbind) %>%
# select only the relevant values
dplyr::select(date = "Date",
time = "Time",
temp_F = "Temperature (\xb0F)",
wind_speed_mph = "Wind Speed (ave) (mph)",
relative_humidity_percent = "Relative Humidity (% RH)",
solar_rad_W_m2 = "Pyranometer 0-2000 W/m\xb2 (W/m\xb2)",
precip_inches = "Precipitation II (inch)"
) %>%
# properly format data classes
dplyr::mutate(date_time = as.POSIXct(paste(date, time),
format = "%m/%d/%y %I:%M %p"),
date = as.Date(date, format = "%m/%d/%y"),
time = as.POSIXct(time, format = "%I:%M %p"),
temp_F = as.numeric(temp_F),
temp_C =(temp_F-32)*(5/9),
relative_humidity_percent = as.numeric(relative_humidity_percent),
solar_rad_W_m2 = as.numeric(solar_rad_W_m2),
wind_speed_mph = as.numeric(wind_speed_mph),
precip_inches = as.numeric(precip_inches)
) %>%
dplyr::filter(complete.cases(temp_C, relative_humidity_percent))## Warning in mask$eval_all_mutate(quo): NAs introduced by coercion
## Warning in mask$eval_all_mutate(quo): NAs introduced by coercion
## Warning in mask$eval_all_mutate(quo): NAs introduced by coercion
## Warning in mask$eval_all_mutate(quo): NAs introduced by coercion
## Warning in mask$eval_all_mutate(quo): NAs introduced by coercion
## date time temp_F
## Min. :2021-10-05 Min. :2023-10-23 00:00:00.00 Min. :43.00
## 1st Qu.:2021-10-12 1st Qu.:2023-10-23 05:45:00.00 1st Qu.:54.50
## Median :2021-10-16 Median :2023-10-23 11:45:00.00 Median :58.90
## Mean :2021-10-16 Mean :2023-10-23 11:51:51.85 Mean :60.36
## 3rd Qu.:2021-10-21 3rd Qu.:2023-10-23 17:45:00.00 3rd Qu.:65.90
## Max. :2021-10-26 Max. :2023-10-23 23:45:00.00 Max. :88.90
## wind_speed_mph relative_humidity_percent solar_rad_W_m2 precip_inches
## Min. : 0.100 Min. : 12.10 Min. : 0.0 Min. :0.0000000
## 1st Qu.: 0.100 1st Qu.: 44.00 1st Qu.: 0.0 1st Qu.:0.0000000
## Median : 2.700 Median : 71.90 Median : 1.5 Median :0.0000000
## Mean : 2.758 Mean : 68.02 Mean :190.8 Mean :0.0009429
## 3rd Qu.: 4.400 3rd Qu.: 96.10 3rd Qu.:387.0 3rd Qu.:0.0000000
## Max. :12.100 Max. :100.00 Max. :879.7 Max. :0.2170000
## date_time temp_C
## Min. :2021-10-05 00:00:00.00 Min. : 6.111
## 1st Qu.:2021-10-12 08:52:30.00 1st Qu.:12.500
## Median :2021-10-16 18:00:00.00 Median :14.944
## Mean :2021-10-16 11:57:00.16 Mean :15.754
## 3rd Qu.:2021-10-21 03:00:00.00 3rd Qu.:18.833
## Max. :2021-10-26 00:00:00.00 Max. :31.611
Get times we want to know weather data for:
# get earliest and latest capture times for a given day
collection_data %>%
group_by(date) %>%
summarise(min(time_capture), max(time_capture))## # A tibble: 5 × 3
## date `min(time_capture)` `max(time_capture)`
## <date> <dttm> <dttm>
## 1 2021-10-05 2023-10-23 10:10:00 2023-10-23 11:40:00
## 2 2021-10-11 2023-10-23 10:10:00 2023-10-23 11:53:00
## 3 2021-10-12 2023-10-23 10:29:00 2023-10-23 16:00:00
## 4 2021-10-18 2023-10-23 10:18:00 2023-10-23 11:34:00
## 5 2021-10-19 2023-10-23 10:20:00 2023-10-23 11:58:00
# list times weather station has recordings, 1h before to 1h after captures for a given day
t1_days <- seq(as.POSIXct("2021-10-05 09:00"),
as.POSIXct("2021-10-05 13:00"), "15 min")
t2_days <- seq(as.POSIXct("2021-10-11 09:00"),
as.POSIXct("2021-10-11 13:00"), "15 min")
t3_days <- seq(as.POSIXct("2021-10-12 09:00"),
as.POSIXct("2021-10-12 17:00"), "15 min")
t4_days <- seq(as.POSIXct("2021-10-18 09:00"),
as.POSIXct("2021-10-18 13:00"), "15 min")
t5_days <- seq(as.POSIXct("2021-10-19 09:00"),
as.POSIXct("2021-10-19 13:00"), "15 min")
exp_dates <- data.frame(date_time = c(t1_days, t2_days, t3_days,
t4_days, t5_days)) %>%
dplyr::mutate(date = as.Date(substr(date_time, 1, 10)))
summary(exp_dates)## date_time date
## Min. :2021-10-05 09:00:00.00 Min. :2021-10-05
## 1st Qu.:2021-10-11 11:00:00.00 1st Qu.:2021-10-11
## Median :2021-10-12 13:00:00.00 Median :2021-10-12
## Mean :2021-10-13 07:51:05.34 Mean :2021-10-12
## 3rd Qu.:2021-10-18 11:00:00.00 3rd Qu.:2021-10-18
## Max. :2021-10-19 13:00:00.00 Max. :2021-10-19
subset weather data including ~1hr buffer before first and after last capture times for each measurement day:
weather_data_subset <- all_weather_data %>%
dplyr::filter(date_time %in% exp_dates$date_time)
summary(weather_data_subset)## date time temp_F
## Min. :2021-10-05 Min. :2023-10-23 09:00:00.00 Min. :54.40
## 1st Qu.:2021-10-11 1st Qu.:2023-10-23 10:15:00.00 1st Qu.:61.40
## Median :2021-10-12 Median :2023-10-23 11:30:00.00 Median :64.70
## Mean :2021-10-13 Mean :2023-10-23 11:42:48.65 Mean :64.63
## 3rd Qu.:2021-10-18 3rd Qu.:2023-10-23 12:45:00.00 3rd Qu.:66.80
## Max. :2021-10-19 Max. :2023-10-23 17:00:00.00 Max. :81.00
## wind_speed_mph relative_humidity_percent solar_rad_W_m2 precip_inches
## Min. : 0.100 Min. :19.50 Min. :289.2 Min. :0
## 1st Qu.: 3.900 1st Qu.:30.20 1st Qu.:508.1 1st Qu.:0
## Median : 5.000 Median :45.10 Median :669.2 Median :0
## Mean : 5.229 Mean :42.89 Mean :632.6 Mean :0
## 3rd Qu.: 6.700 3rd Qu.:51.80 3rd Qu.:766.0 3rd Qu.:0
## Max. :10.100 Max. :74.10 Max. :833.0 Max. :0
## date_time temp_C
## Min. :2021-10-05 09:00:00.00 Min. :12.44
## 1st Qu.:2021-10-11 12:30:00.00 1st Qu.:16.33
## Median :2021-10-12 14:00:00.00 Median :18.17
## Mean :2021-10-14 01:12:19.46 Mean :18.13
## 3rd Qu.:2021-10-18 11:30:00.00 3rd Qu.:19.33
## Max. :2021-10-19 13:00:00.00 Max. :27.22
Weather Models
Temperature, relative humidity, and wind speed were significantly different among capture days
Temperature
ANOVA:
weather_temp_aov <- aov(data = weather_data_subset,
temp_C ~ as.factor(date))
summary(weather_temp_aov)## Df Sum Sq Mean Sq F value Pr(>F)
## as.factor(date) 4 930.8 232.70 82.32 <2e-16 ***
## Residuals 180 508.8 2.83
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = temp_C ~ as.factor(date), data = weather_data_subset)
##
## $`as.factor(date)`
## diff lwr upr p adj
## 2021-10-11-2021-10-05 -5.7320261 -7.1082606 -4.355791711 0.0000000
## 2021-10-12-2021-10-05 -6.1604278 -7.4205535 -4.900302109 0.0000000
## 2021-10-18-2021-10-05 -8.8529412 -10.2291756 -7.476706744 0.0000000
## 2021-10-19-2021-10-05 -7.1470588 -8.5232933 -5.770824391 0.0000000
## 2021-10-12-2021-10-11 -0.4284017 -1.4064489 0.549645560 0.7473358
## 2021-10-18-2021-10-11 -3.1209150 -4.2446057 -1.997224324 0.0000000
## 2021-10-19-2021-10-11 -1.4150327 -2.5387234 -0.291341971 0.0058066
## 2021-10-18-2021-10-12 -2.6925134 -3.6705606 -1.714466146 0.0000000
## 2021-10-19-2021-10-12 -0.9866310 -1.9646782 -0.008583793 0.0468755
## 2021-10-19-2021-10-18 1.7058824 0.5821916 2.829573062 0.0004273
Figure:
ggplot(data = weather_data_subset) +
aes(x = as.factor(date),
y = temp_C,
color = as.factor(date)) +
geom_boxplot(size = 1) +
geom_jitter(size = 0.6,
alpha = 0.6) +
theme_classic() +
xlab("Date") +
ylab("Average Temperature During Capture (C)") +
#annotate("text", x = 3, y = 22, label = "example", size = 6) +
scale_color_brewer(palette = "Set2") +
theme(text = element_text(color = "black",
family = "sans",
size = 18),
axis.text = element_text(color = "black",
family = "sans",
size = 14),
legend.text.align = 0,
legend.position = "none") -> weather_temp_fig
weather_temp_figRelative Humidity
NOTE: will need to convert to VPD if we intend to add this to the LMM, but it’s more likely we will just incorporate via capture_date random effect.
weather_humid_aov <- aov(data = weather_data_subset,
relative_humidity_percent ~ as.factor(date))
summary(weather_humid_aov)## Df Sum Sq Mean Sq F value Pr(>F)
## as.factor(date) 4 22366 5591 71.91 <2e-16 ***
## Residuals 180 13996 78
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = relative_humidity_percent ~ as.factor(date), data = weather_data_subset)
##
## $`as.factor(date)`
## diff lwr upr p adj
## 2021-10-11-2021-10-05 -14.541176 -21.759105 -7.323248 0.0000010
## 2021-10-12-2021-10-05 -24.974510 -31.583484 -18.365536 0.0000000
## 2021-10-18-2021-10-05 2.582353 -4.635576 9.800282 0.8614006
## 2021-10-19-2021-10-05 -5.670588 -12.888517 1.547340 0.1979082
## 2021-10-12-2021-10-11 -10.433333 -15.562892 -5.303775 0.0000008
## 2021-10-18-2021-10-11 17.123529 11.230115 23.016943 0.0000000
## 2021-10-19-2021-10-11 8.870588 2.977174 14.764002 0.0004917
## 2021-10-18-2021-10-12 27.556863 22.427304 32.686421 0.0000000
## 2021-10-19-2021-10-12 19.303922 14.174363 24.433480 0.0000000
## 2021-10-19-2021-10-18 -8.252941 -14.146355 -2.359527 0.0014758
ggplot(data = weather_data_subset) +
aes(x = as.factor(date),
y = relative_humidity_percent,
color = as.factor(date)) +
geom_boxplot(size = 1) +
geom_jitter(size = 0.6,
alpha = 0.6) +
theme_classic() +
xlab("Date") +
ylab("Average Relative Humidity During Capture (%)") +
#annotate("text", x = 3, y = 22, label = "example", size = 6) +
scale_color_brewer(palette = "Set2") +
theme(text = element_text(color = "black",
family = "sans",
size = 18),
axis.text = element_text(color = "black",
family = "sans",
size = 14),
legend.text.align = 0,
legend.position = "none") -> weather_humid_fig
weather_humid_figSolar Radiation
weather_sorad_aov <- aov(data = weather_data_subset,
solar_rad_W_m2 ~ as.factor(date))
summary(weather_sorad_aov)## Df Sum Sq Mean Sq F value Pr(>F)
## as.factor(date) 4 15183 3796 0.158 0.959
## Residuals 180 4314164 23968
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = solar_rad_W_m2 ~ as.factor(date), data = weather_data_subset)
##
## $`as.factor(date)`
## diff lwr upr p adj
## 2021-10-11-2021-10-05 18.864706 -107.85777 145.58719 0.9940100
## 2021-10-12-2021-10-05 27.290553 -88.74074 143.32184 0.9668058
## 2021-10-18-2021-10-05 7.264706 -119.45777 133.98719 0.9998590
## 2021-10-19-2021-10-05 16.611765 -110.11072 143.33424 0.9963337
## 2021-10-12-2021-10-11 8.425847 -81.63190 98.48359 0.9990194
## 2021-10-18-2021-10-11 -11.600000 -115.06847 91.86847 0.9980058
## 2021-10-19-2021-10-11 -2.252941 -105.72141 101.21553 0.9999970
## 2021-10-18-2021-10-12 -20.025847 -110.08359 70.03190 0.9729271
## 2021-10-19-2021-10-12 -10.678788 -100.73653 79.37896 0.9975178
## 2021-10-19-2021-10-18 9.347059 -94.12141 112.81553 0.9991459
ggplot(data = weather_data_subset) +
aes(x = as.factor(date),
y = solar_rad_W_m2 ,
color = as.factor(date)) +
geom_boxplot(size = 1) +
geom_jitter(size = 0.6,
alpha = 0.6) +
theme_classic() +
xlab("Date") +
ylab("Average Solar Radiation During Capture (W/m2)") +
#annotate("text", x = 3, y = 22, label = "example", size = 6) +
scale_color_brewer(palette = "Set2") +
theme(text = element_text(color = "black",
family = "sans",
size = 18),
axis.text = element_text(color = "black",
family = "sans",
size = 14),
legend.text.align = 0,
legend.position = "none") -> weather_solar_fig
weather_solar_figWind Speed
weather_wind_aov <- aov(data = weather_data_subset,
wind_speed_mph ~ as.factor(date))
summary(weather_wind_aov)## Df Sum Sq Mean Sq F value Pr(>F)
## as.factor(date) 4 401.1 100.28 60.84 <2e-16 ***
## Residuals 180 296.7 1.65
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = wind_speed_mph ~ as.factor(date), data = weather_data_subset)
##
## $`as.factor(date)`
## diff lwr upr p adj
## 2021-10-11-2021-10-05 4.4529412 3.402010646 5.5038717 0.0000000
## 2021-10-12-2021-10-05 2.7891266 1.826859858 3.7513933 0.0000000
## 2021-10-18-2021-10-05 4.2470588 3.196128294 5.2979894 0.0000000
## 2021-10-19-2021-10-05 1.0529412 0.002010646 2.1038717 0.0493042
## 2021-10-12-2021-10-11 -1.6638146 -2.410678423 -0.9169508 0.0000001
## 2021-10-18-2021-10-11 -0.2058824 -1.063963537 0.6521988 0.9643335
## 2021-10-19-2021-10-11 -3.4000000 -4.258081185 -2.5419188 0.0000000
## 2021-10-18-2021-10-12 1.4579323 0.711068458 2.2047961 0.0000023
## 2021-10-19-2021-10-12 -1.7361854 -2.483049189 -0.9893216 0.0000000
## 2021-10-19-2021-10-18 -3.1941176 -4.052198832 -2.3360365 0.0000000
ggplot(data = weather_data_subset) +
aes(x = as.factor(date),
y = wind_speed_mph ,
color = as.factor(date)) +
geom_boxplot(size = 1) +
geom_jitter(size = 0.6,
alpha = 0.6) +
theme_classic() +
xlab("Date") +
ylab("Average Wind Speed During Capture (mph)") +
#annotate("text", x = 3, y = 22, label = "example", size = 6) +
scale_color_brewer(palette = "Set2") +
theme(text = element_text(color = "black",
family = "sans",
size = 18),
axis.text = element_text(color = "black",
family = "sans",
size = 14),
legend.text.align = 0,
legend.position = "none") -> weather_wind_fig
weather_wind_figThermocouple Data
filenames:
# make a list of file names of all data to load in
filenames_thermocouple <- list.files(path = "./Data/Thermocouple data",
pattern = ".csv")Thermocouple variables that we will load in with each file:
- date
- time
- lizard ID
- value_cloacal = cloacal temperature in Celcius
- thermocouple_cloacal = label of cloacal thermocouple
- value_dorsal = dorsal temperature in Celcius
- thermocouple_dorsal = label of dorsal thermocouple
read_thermocouple_files <- function(filename) {
# load file
dat <- read.csv(file.path("./Data/Thermocouple data",
filename),
header = TRUE # each csv has headers
) %>%
# select only the relevant values
dplyr::select(date = 'Date',
time = 'Time',
lizard_ID = 'Lizard',
value_cloacal = 'Value_Cloacal',
thermocouple_cloacal = 'Thermocouple_Cloacal',
value_dorsal = 'Value_Dorsal',
thermocouple_dorsal = 'Thermocouple_Dorsal'
)
# return the dataframe for that single csv file
dat
}apply function and format variables:
# apply function to get data from all csvs
all_thermocouple_data <- lapply(filenames_thermocouple,
read_thermocouple_files) %>%
# paste all data files together into one df by row
reduce(rbind) %>%
# properly format data classes
mutate(date_time = as.POSIXct(paste(date, time),
format = "%m/%d/%Y %H:%M:%S"),
date = as.Date(date, format = "%m/%d/%Y"),
time = as.POSIXct(time, format = "%H:%M:%S"),
lizard_ID = as.factor(lizard_ID),
thermocouple_cloacal = as.factor(thermocouple_cloacal),
thermocouple_dorsal = as.factor(thermocouple_dorsal),
value_cloacal = as.numeric(value_cloacal),
value_dorsal = as.numeric(value_dorsal)
) %>%
dplyr::filter(complete.cases(value_cloacal, value_dorsal)) %>%
# need to get rid of non-measurements when TC not plugged in, temp = 9999
dplyr::filter(value_dorsal < 50) %>%
dplyr::filter(value_cloacal < 50)
summary(all_thermocouple_data)## date time lizard_ID
## Min. :2021-10-05 Min. :2023-10-23 13:17:44.00 406 : 1886
## 1st Qu.:2021-10-11 1st Qu.:2023-10-23 14:31:25.25 407 : 1009
## Median :2021-10-12 Median :2023-10-23 15:22:32.00 404 : 1003
## Mean :2021-10-12 Mean :2023-10-23 15:26:16.27 436 : 976
## 3rd Qu.:2021-10-18 3rd Qu.:2023-10-23 16:20:06.75 430 : 969
## Max. :2021-10-19 Max. :2023-10-23 17:48:51.00 425 : 964
## (Other):28847
## value_cloacal thermocouple_cloacal value_dorsal thermocouple_dorsal
## Min. :11.80 E :9326 Min. :12.50 A :7651
## 1st Qu.:20.60 D :5778 1st Qu.:22.20 F :6477
## Median :25.20 B :5696 Median :25.30 D :5667
## Mean :24.51 F :5595 Mean :24.95 G :3756
## 3rd Qu.:27.40 C :3633 3rd Qu.:27.30 E :3740
## Max. :38.00 A :2820 Max. :36.30 H :3633
## (Other):2806 (Other):4730
## date_time
## Min. :2021-10-05 13:17:44.00
## 1st Qu.:2021-10-11 14:12:38.25
## Median :2021-10-12 15:19:27.50
## Mean :2021-10-13 15:21:52.15
## 3rd Qu.:2021-10-18 17:01:30.75
## Max. :2021-10-19 16:45:25.00
##
TC Calibration Data
The thermocouples tend to drift 1-2 C off of the true temperature, so we collected reference data to make a calibration curve for each one.
Get all the calibration data:
TCD1 <- read.csv("./Data/TC Calibration/thermocouple calibration data 1.csv")
TCD2 <- read.csv("./Data/TC Calibration/thermocouple calibration data 2.csv")
TCD3 <- read.csv("./Data/TC Calibration/thermocouple calibration data 3.csv")
TCR1 <- read.csv("./Data/TC Calibration/thermocouple_calibration_1_ref.csv") %>%
mutate(date = "9/28/21")
TCR2 <- read.csv("./Data/TC Calibration/thermocouple_calibration_2_ref.csv") %>%
mutate(date = "10/1/21")
TCR3 <- read.csv("./Data/TC Calibration/thermocouple_calibration_3_ref.csv") %>%
mutate(date = "1/8/22")
TC_key <- read.csv("./Data/TC Calibration/thermocouple_calibration_key.csv")Reorganize repeated columns into vertical form:
# thermocouple calibration data 1
TCD1.1 <- TCD1 %>% dplyr::select(date = Date,
time = Time,
temp_C = Value,
port = Unit)
TCD1.2 <- TCD1 %>% dplyr::select(date = Date,
time = Time,
temp_C = Value.1,
port = Unit.1)
TCD1.3 <- TCD1 %>% dplyr::select(date = Date,
time = Time,
temp_C = Value.2,
port = Unit.2)
# thermocouple calibration data 2
TCD2.1 <- TCD2 %>% dplyr::select(date = Date,
time = Time,
temp_C = Value,
port = Unit)
TCD2.2 <- TCD2 %>% dplyr::select(date = Date,
time = Time,
temp_C = Value.1,
port = Unit.1)
TCD2.3 <- TCD2 %>% dplyr::select(date = Date,
time = Time,
temp_C = Value.2,
port = Unit.2)
TCD2.4 <- TCD2 %>% dplyr::select(date = Date,
time = Time,
temp_C = Value.3,
port = Unit.3)
# thermocouple calibration data 3
TCD3.1 <- TCD3 %>% dplyr::select(date = Date,
time = Time,
temp_C = Value,
port = Unit)
# format and combine
TC_ref <- TCR1 %>%
rbind(TCR2) %>%
rbind(TCR3) %>%
mutate(date = as.Date(date, format = "%m/%d/%y")) %>%
dplyr::filter(complete.cases(reference_temp))
TC_key_formatted <- TC_key %>%
mutate(date = as.Date(date, format = "%m/%d/%y"),
lead = as.factor(lead),
port = as.factor(port))
calibration_data <- TCD1.1 %>%
rbind(TCD1.2) %>%
rbind(TCD1.3) %>%
rbind(TCD2.1) %>%
rbind(TCD2.2) %>%
rbind(TCD2.3) %>%
rbind(TCD2.4) %>%
rbind(TCD3.1) %>%
mutate(date = as.Date(date, format = "%m/%d/%Y"),
time = substr(time, 1, 5),
port = as.factor(substr(port, 1, 2))
) %>%
left_join(TC_key_formatted, by = c("date", "port")) %>%
left_join(TC_ref, by = c("date", "time")) %>%
dplyr::filter(complete.cases(reference_temp))
summary(calibration_data)## date time temp_C port lead
## Min. :2021-09-28 Length:1004 Min. : 7.30 T1:366 B :151
## 1st Qu.:2021-09-28 Class :character 1st Qu.:16.30 T2:263 E :151
## Median :2021-10-01 Mode :character Median :25.00 T3:263 H :151
## Mean :2021-10-09 Mean :25.38 T4:112 A :112
## 3rd Qu.:2021-10-01 3rd Qu.:33.40 C :112
## Max. :2022-01-08 Max. :41.20 D :112
## (Other):215
## reference_temp
## Min. : 1.50
## 1st Qu.:15.00
## Median :23.50
## Mean :23.95
## 3rd Qu.:32.00
## Max. :40.00
##
Data Wrangling
CEWL Errors
Remove sections of points where the probe got disconnected:
lizard note 403 error @520, moved @650 405 error @320 & 660 406 error @250 & 850 408 broke seal ~170 411 large move @400 414 large move @660 426 Big move 436 move @~700
Lizard 403
This lizard’s data will NOT be included in analyses due to suspected Aquaflux failure to re-equibrilate.
all_CEWL_data %>%
dplyr::filter(lizard_ID == 403) %>%
ggplot(aes(x = time_sec,
y = CEWL_g_m2h)) +
geom_point() +
geom_vline(xintercept = 520) + # first error note
geom_vline(xintercept = 650) + # second error note
xlim(0, 900) + ylim(5,15)## Warning: Removed 77 rows containing missing values (`geom_point()`).
l403_remove <- c(500:560, 600:660)
l403_filtered <- all_CEWL_data %>%
dplyr::filter(lizard_ID == 403) %>%
dplyr::filter(floor(time_sec) %nin% l403_remove)
ggplot(data = l403_filtered,
aes(x = time_sec,
y = CEWL_g_m2h)) +
geom_point() +
geom_vline(xintercept = 520) + # first error note
geom_vline(xintercept = 650) + # second error note
xlim(60, 900) #+ ylim(5, 12)## Warning: Removed 60 rows containing missing values (`geom_point()`).
## time_sec CEWL_g_m2h msmt_temp_C msmt_RH_percent
## Min. : 0.0 Min. : 4.88 Min. :25.20 Min. :47.50
## 1st Qu.:194.6 1st Qu.: 8.22 1st Qu.:25.30 1st Qu.:47.90
## Median :389.1 Median :10.02 Median :25.30 Median :48.00
## Mean :429.9 Mean :11.05 Mean :25.32 Mean :47.95
## 3rd Qu.:706.0 3rd Qu.:12.48 3rd Qu.:25.40 3rd Qu.:48.10
## Max. :900.6 Max. :26.26 Max. :25.60 Max. :48.20
##
## date lizard_ID date_time_start
## Min. :2021-10-05 403 :765 Min. :2021-10-05 16:30:31
## 1st Qu.:2021-10-05 401 : 0 1st Qu.:2021-10-05 16:30:31
## Median :2021-10-05 402 : 0 Median :2021-10-05 16:30:31
## Mean :2021-10-05 404 : 0 Mean :2021-10-05 16:30:31
## 3rd Qu.:2021-10-05 405 : 0 3rd Qu.:2021-10-05 16:30:31
## Max. :2021-10-05 406 : 0 Max. :2021-10-05 16:30:31
## (Other): 0
## date_time
## Min. :2021-10-05 16:30:31.00
## 1st Qu.:2021-10-05 16:33:45.00
## Median :2021-10-05 16:37:00.00
## Mean :2021-10-05 16:37:40.45
## 3rd Qu.:2021-10-05 16:42:17.00
## Max. :2021-10-05 16:45:31.00
##
Lizard 405
This lizard’s data will NOT be included in analyses due to suspected Aquaflux failure to re-equibrilate.
all_CEWL_data %>%
dplyr::filter(lizard_ID == 405) %>%
ggplot(aes(x = time_sec,
y = CEWL_g_m2h)) +
geom_point() +
geom_vline(xintercept = 320) + # error 1
geom_vline(xintercept = 660) + # error 2
xlim(0,900) + ylim(0,15)## Warning: Removed 63 rows containing missing values (`geom_point()`).
l405_remove <- c(297:357, 635:695)
l405_filtered <- all_CEWL_data %>%
dplyr::filter(lizard_ID == 405) %>%
dplyr::filter(floor(time_sec) %nin% l405_remove)
ggplot(data = l405_filtered,
aes(x = time_sec,
y = CEWL_g_m2h)) +
geom_point() +
geom_vline(xintercept = 320) +
geom_vline(xintercept = 660) +
xlim(60, 900) + ylim(0, 12)## Warning: Removed 107 rows containing missing values (`geom_point()`).
## time_sec CEWL_g_m2h msmt_temp_C msmt_RH_percent
## Min. : 0.0 Min. : 2.630 Min. :25.40 Min. :46.10
## 1st Qu.:194.6 1st Qu.: 4.160 1st Qu.:25.40 1st Qu.:46.40
## Median :450.2 Median : 6.330 Median :25.50 Median :46.60
## Mean :442.9 Mean : 7.324 Mean :25.47 Mean :46.54
## 3rd Qu.:705.8 3rd Qu.: 8.200 3rd Qu.:25.50 3rd Qu.:46.70
## Max. :900.3 Max. :21.190 Max. :25.50 Max. :46.80
##
## date lizard_ID date_time_start
## Min. :2021-10-05 405 :765 Min. :2021-10-05 15:25:40
## 1st Qu.:2021-10-05 401 : 0 1st Qu.:2021-10-05 15:25:40
## Median :2021-10-05 402 : 0 Median :2021-10-05 15:25:40
## Mean :2021-10-05 403 : 0 Mean :2021-10-05 15:25:40
## 3rd Qu.:2021-10-05 404 : 0 3rd Qu.:2021-10-05 15:25:40
## Max. :2021-10-05 406 : 0 Max. :2021-10-05 15:25:40
## (Other): 0
## date_time
## Min. :2021-10-05 15:25:40.00
## 1st Qu.:2021-10-05 15:28:54.00
## Median :2021-10-05 15:33:10.00
## Mean :2021-10-05 15:33:02.46
## 3rd Qu.:2021-10-05 15:37:25.00
## Max. :2021-10-05 15:40:40.00
##
Lizard 406
Will remove from 238 to 287 seconds, 49 seconds total, for beginning error and last 81 seconds (820 to 901 seconds) due to failure to re-equibrilate after second error.
all_CEWL_data %>%
dplyr::filter(lizard_ID == 406) %>%
ggplot(aes(x = time_sec,
y = CEWL_g_m2h)) +
geom_point() +
geom_vline(xintercept = 238) + # error 1
geom_vline(xintercept = 287) + # error 2
geom_hline(yintercept = 7.5) +
xlim(200, 900) + ylim(0, 12)## Warning: Removed 198 rows containing missing values (`geom_point()`).
l406_remove <- c(238:287, 820:901)
l406_filtered <- all_CEWL_data %>%
dplyr::filter(lizard_ID == 406) %>%
dplyr::filter(floor(time_sec) %nin% l406_remove)
ggplot(data = l406_filtered,
aes(x = time_sec,
y = CEWL_g_m2h)) +
geom_point() +
geom_vline(xintercept = 250) + # error 1
geom_vline(xintercept = 850) + # error 2
xlim(60, 900) + ylim(0, 12)## Warning: Removed 123 rows containing missing values (`geom_point()`).
## time_sec CEWL_g_m2h msmt_temp_C msmt_RH_percent
## Min. : 0.0 Min. : 1.380 Min. :25.3 Min. :46.50
## 1st Qu.:192.2 1st Qu.: 4.628 1st Qu.:25.4 1st Qu.:46.60
## Median :434.8 Median : 5.865 Median :25.4 Median :46.60
## Mean :419.2 Mean : 7.433 Mean :25.4 Mean :46.61
## 3rd Qu.:627.1 3rd Qu.: 8.020 3rd Qu.:25.4 3rd Qu.:46.70
## Max. :819.2 Max. :21.000 Max. :25.5 Max. :46.70
##
## date lizard_ID date_time_start
## Min. :2021-10-05 406 :756 Min. :2021-10-05 14:58:21
## 1st Qu.:2021-10-05 401 : 0 1st Qu.:2021-10-05 14:58:21
## Median :2021-10-05 402 : 0 Median :2021-10-05 14:58:21
## Mean :2021-10-05 403 : 0 Mean :2021-10-05 14:58:21
## 3rd Qu.:2021-10-05 404 : 0 3rd Qu.:2021-10-05 14:58:21
## Max. :2021-10-05 405 : 0 Max. :2021-10-05 14:58:21
## (Other): 0
## date_time
## Min. :2021-10-05 14:58:21.00
## 1st Qu.:2021-10-05 15:01:32.75
## Median :2021-10-05 15:05:35.50
## Mean :2021-10-05 15:05:19.78
## 3rd Qu.:2021-10-05 15:08:47.25
## Max. :2021-10-05 15:12:00.00
##
Lizard 408
Will remove one section from 144 to 222 seconds, 78 seconds total.
all_CEWL_data %>%
dplyr::filter(lizard_ID == 408) %>%
ggplot(aes(x = time_sec,
y = CEWL_g_m2h)) +
geom_point() +
geom_vline(xintercept = 144) +
geom_vline(xintercept = 222) + # error @170 when seal broke
geom_hline(yintercept = 26) +
xlim(100,300) + ylim(0,40)## Warning: Removed 689 rows containing missing values (`geom_point()`).
l408_remove <- c(144:222)
l408_filtered <- all_CEWL_data %>%
dplyr::filter(lizard_ID == 408) %>%
dplyr::filter(floor(time_sec) %nin% l408_remove)
ggplot(data = l408_filtered,
aes(x = time_sec,
y = CEWL_g_m2h)) +
geom_point() +
geom_vline(xintercept = 144) + # error @170 when seal broke
geom_vline(xintercept = 222) +
xlim(60, 900) + ylim(0, 35)## Warning: Removed 60 rows containing missing values (`geom_point()`).
## time_sec CEWL_g_m2h msmt_temp_C msmt_RH_percent
## Min. : 0.0 Min. : 1.11 Min. :25.60 Min. :46.70
## 1st Qu.:284.1 1st Qu.:19.98 1st Qu.:25.70 1st Qu.:47.00
## Median :489.4 Median :20.97 Median :25.70 Median :47.10
## Mean :475.7 Mean :21.46 Mean :25.72 Mean :47.14
## 3rd Qu.:695.0 3rd Qu.:23.27 3rd Qu.:25.70 3rd Qu.:47.30
## Max. :900.5 Max. :26.05 Max. :26.00 Max. :47.50
##
## date lizard_ID date_time_start
## Min. :2021-10-05 408 :808 Min. :2021-10-05 13:21:12
## 1st Qu.:2021-10-05 401 : 0 1st Qu.:2021-10-05 13:21:12
## Median :2021-10-05 402 : 0 Median :2021-10-05 13:21:12
## Mean :2021-10-05 403 : 0 Mean :2021-10-05 13:21:12
## 3rd Qu.:2021-10-05 404 : 0 3rd Qu.:2021-10-05 13:21:12
## Max. :2021-10-05 405 : 0 Max. :2021-10-05 13:21:12
## (Other): 0
## date_time
## Min. :2021-10-05 13:21:12.00
## 1st Qu.:2021-10-05 13:25:55.75
## Median :2021-10-05 13:29:21.00
## Mean :2021-10-05 13:29:07.26
## 3rd Qu.:2021-10-05 13:32:46.25
## Max. :2021-10-05 13:36:12.00
##
Lizard 411
Will remove one section from 301 to 415 seconds, 114 seconds total.
all_CEWL_data %>%
dplyr::filter(lizard_ID == 411) %>%
ggplot(aes(x = time_sec,
y = CEWL_g_m2h)) +
geom_point() +
geom_vline(xintercept = 301) +
geom_vline(xintercept = 415) + # large move at 400s
geom_hline(yintercept = 7) +
xlim(250,450) + ylim(0,10)## Warning: Removed 689 rows containing missing values (`geom_point()`).
l411_remove <- c(301:415)
l411_filtered <- all_CEWL_data %>%
dplyr::filter(lizard_ID == 411) %>%
dplyr::filter(floor(time_sec) %nin% l411_remove)
ggplot(data = l411_filtered,
aes(x = time_sec,
y = CEWL_g_m2h)) +
geom_point() +
geom_vline(xintercept = 297) +
geom_vline(xintercept = 660) +
xlim(60, 900) + ylim(0, 12)## Warning: Removed 96 rows containing missing values (`geom_point()`).
## time_sec CEWL_g_m2h msmt_temp_C msmt_RH_percent
## Min. : 0.0 Min. : 4.230 Min. :25.30 Min. :32.50
## 1st Qu.:196.4 1st Qu.: 4.890 1st Qu.:25.50 1st Qu.:32.90
## Median :507.7 Median : 6.010 Median :25.50 Median :33.30
## Mean :463.8 Mean : 7.878 Mean :25.49 Mean :33.22
## 3rd Qu.:704.2 3rd Qu.: 7.610 3rd Qu.:25.50 3rd Qu.:33.50
## Max. :900.7 Max. :32.320 Max. :25.60 Max. :33.70
##
## date lizard_ID date_time_start
## Min. :2021-10-11 411 :773 Min. :2021-10-11 14:11:19
## 1st Qu.:2021-10-11 401 : 0 1st Qu.:2021-10-11 14:11:19
## Median :2021-10-11 402 : 0 Median :2021-10-11 14:11:19
## Mean :2021-10-11 403 : 0 Mean :2021-10-11 14:11:19
## 3rd Qu.:2021-10-11 404 : 0 3rd Qu.:2021-10-11 14:11:19
## Max. :2021-10-11 405 : 0 Max. :2021-10-11 14:11:19
## (Other): 0
## date_time
## Min. :2021-10-11 14:11:19.00
## 1st Qu.:2021-10-11 14:14:35.00
## Median :2021-10-11 14:19:46.00
## Mean :2021-10-11 14:19:02.33
## 3rd Qu.:2021-10-11 14:23:03.00
## Max. :2021-10-11 14:26:19.00
##
Lizard 414
Will remove last 246 seconds (655 to 901 seconds) due to failure to re-equibrilate after error.
all_CEWL_data %>%
dplyr::filter(lizard_ID == 414) %>%
ggplot(aes(x = time_sec,
y = CEWL_g_m2h)) +
geom_point() +
geom_vline(xintercept = 655) + # large move @660s
geom_vline(xintercept = 720) +
xlim(0,900) + ylim(0,15)## Warning: Removed 1 rows containing missing values (`geom_point()`).
l414_remove <- c(655:901)
l414_filtered <- all_CEWL_data %>%
dplyr::filter(lizard_ID == 414) %>%
dplyr::filter(floor(time_sec) %nin% l414_remove)
ggplot(data = l414_filtered,
aes(x = time_sec,
y = CEWL_g_m2h)) +
geom_point() +
geom_vline(xintercept = 660) + # large move @660s
xlim(60, 900) + ylim(0, 12)## Warning: Removed 59 rows containing missing values (`geom_point()`).
## time_sec CEWL_g_m2h msmt_temp_C msmt_RH_percent
## Min. : 0.0 Min. : 1.510 Min. :25.60 Min. :33.50
## 1st Qu.:163.6 1st Qu.: 4.330 1st Qu.:25.70 1st Qu.:33.60
## Median :327.3 Median : 4.735 Median :25.70 Median :33.70
## Mean :327.3 Mean : 5.530 Mean :25.73 Mean :33.68
## 3rd Qu.:490.9 3rd Qu.: 5.350 3rd Qu.:25.80 3rd Qu.:33.70
## Max. :654.5 Max. :13.740 Max. :25.90 Max. :33.80
##
## date lizard_ID date_time_start
## Min. :2021-10-11 414 :644 Min. :2021-10-11 16:57:39
## 1st Qu.:2021-10-11 401 : 0 1st Qu.:2021-10-11 16:57:39
## Median :2021-10-11 402 : 0 Median :2021-10-11 16:57:39
## Mean :2021-10-11 403 : 0 Mean :2021-10-11 16:57:39
## 3rd Qu.:2021-10-11 404 : 0 3rd Qu.:2021-10-11 16:57:39
## Max. :2021-10-11 405 : 0 Max. :2021-10-11 16:57:39
## (Other): 0
## date_time
## Min. :2021-10-11 16:57:39.00
## 1st Qu.:2021-10-11 17:00:21.75
## Median :2021-10-11 17:03:05.50
## Mean :2021-10-11 17:03:05.83
## 3rd Qu.:2021-10-11 17:05:49.25
## Max. :2021-10-11 17:08:33.00
##
Lizard 426
Will remove 404 seconds (497 to 901 seconds) due to failure to re-equibrilate after error.
all_CEWL_data %>%
dplyr::filter(lizard_ID == 426) %>%
ggplot(aes(x = time_sec,
y = CEWL_g_m2h)) +
geom_point() +
geom_vline(xintercept = 497) +
geom_vline(xintercept = 630) +
xlim(400, 900) + ylim(0,15)## Warning: Removed 394 rows containing missing values (`geom_point()`).
l426_remove <- c(497:901)
l426_filtered <- all_CEWL_data %>%
dplyr::filter(lizard_ID == 426) %>%
dplyr::filter(floor(time_sec) %nin% l426_remove)
ggplot(data = l426_filtered,
aes(x = time_sec,
y = CEWL_g_m2h)) +
geom_point() +
geom_vline(xintercept = 297) +
geom_vline(xintercept = 660) +
xlim(60, 900) + ylim(0, 12)## Warning: Removed 59 rows containing missing values (`geom_point()`).
## time_sec CEWL_g_m2h msmt_temp_C msmt_RH_percent
## Min. : 0.0 Min. : 1.060 Min. :25.10 Min. :33.60
## 1st Qu.:124.2 1st Qu.: 2.960 1st Qu.:25.20 1st Qu.:33.70
## Median :248.4 Median : 3.640 Median :25.20 Median :33.70
## Mean :248.3 Mean : 5.276 Mean :25.18 Mean :33.74
## 3rd Qu.:372.4 3rd Qu.: 6.327 3rd Qu.:25.20 3rd Qu.:33.80
## Max. :496.5 Max. :17.200 Max. :25.30 Max. :33.90
##
## date lizard_ID date_time_start
## Min. :2021-10-18 426 :488 Min. :2021-10-18 15:08:34
## 1st Qu.:2021-10-18 401 : 0 1st Qu.:2021-10-18 15:08:34
## Median :2021-10-18 402 : 0 Median :2021-10-18 15:08:34
## Mean :2021-10-18 403 : 0 Mean :2021-10-18 15:08:34
## 3rd Qu.:2021-10-18 404 : 0 3rd Qu.:2021-10-18 15:08:34
## Max. :2021-10-18 405 : 0 Max. :2021-10-18 15:08:34
## (Other): 0
## date_time
## Min. :2021-10-18 15:08:34.00
## 1st Qu.:2021-10-18 15:10:37.75
## Median :2021-10-18 15:12:41.50
## Mean :2021-10-18 15:12:41.86
## 3rd Qu.:2021-10-18 15:14:46.25
## Max. :2021-10-18 15:16:50.00
##
Lizard 436
Will remove 201 seconds (700 to 901 seconds) due to failure to re-equibrilate after error.
all_CEWL_data %>%
dplyr::filter(lizard_ID == 436) %>%
ggplot(aes(x = time_sec,
y = CEWL_g_m2h)) +
geom_point() +
geom_vline(xintercept = 700) + # moved here
geom_vline(xintercept = 820) +
xlim(0, 900) + ylim(15, 30)## Warning: Removed 12 rows containing missing values (`geom_point()`).
l436_remove <- c(700:901)
l436_filtered <- all_CEWL_data %>%
dplyr::filter(lizard_ID == 436) %>%
dplyr::filter(floor(time_sec) %nin% l436_remove)
ggplot(data = l436_filtered,
aes(x = time_sec,
y = CEWL_g_m2h)) +
geom_point() +
geom_vline(xintercept = 297) +
geom_vline(xintercept = 660) +
xlim(0, 900) + ylim(0, 30)## time_sec CEWL_g_m2h msmt_temp_C msmt_RH_percent
## Min. : 0.0 Min. : 0.56 Min. :24.00 Min. :35.40
## 1st Qu.:174.9 1st Qu.:21.53 1st Qu.:24.40 1st Qu.:35.60
## Median :349.9 Median :22.29 Median :24.50 Median :35.70
## Mean :349.8 Mean :21.87 Mean :24.45 Mean :35.77
## 3rd Qu.:524.9 3rd Qu.:22.82 3rd Qu.:24.50 3rd Qu.:35.90
## Max. :699.5 Max. :23.81 Max. :24.60 Max. :36.80
##
## date lizard_ID date_time_start
## Min. :2021-10-19 436 :688 Min. :2021-10-19 15:21:03
## 1st Qu.:2021-10-19 401 : 0 1st Qu.:2021-10-19 15:21:03
## Median :2021-10-19 402 : 0 Median :2021-10-19 15:21:03
## Mean :2021-10-19 403 : 0 Mean :2021-10-19 15:21:03
## 3rd Qu.:2021-10-19 404 : 0 3rd Qu.:2021-10-19 15:21:03
## Max. :2021-10-19 405 : 0 Max. :2021-10-19 15:21:03
## (Other): 0
## date_time
## Min. :2021-10-19 15:21:03.00
## 1st Qu.:2021-10-19 15:23:57.75
## Median :2021-10-19 15:26:52.50
## Mean :2021-10-19 15:26:52.41
## 3rd Qu.:2021-10-19 15:29:47.25
## Max. :2021-10-19 15:32:42.00
##
error summary Two lizards (403 and 405) will be completely removed due to suspected Aquaflux failure to re-equibrilate. Six lizards had spikes noted at the time of data collection that were caused by the AquaFlux seal being broken, and we removed those so they will not be considered in our data analyses. Of those six noted spike errors, three lizards had last remaining portion of data removed due to suspected Aquaflux failure to re-equibrilate.
Put Error-Removed Data Back
Data for lizards 403 and 405 will be removed completely, and the rest will have their cleaned-up data added back into the dataframe.
err_lizards <- c(403, 405, 406, 408, 411, 414, 426, 436)
filtered1_CEWL_data <- all_CEWL_data %>%
dplyr::filter(lizard_ID %nin% err_lizards) %>%
rbind(l406_filtered) %>%
rbind(l408_filtered) %>%
rbind(l411_filtered) %>%
rbind(l414_filtered) %>%
rbind(l426_filtered) %>%
rbind(l436_filtered) #%>%
#mutate(lizard_ID = as.factor(lizard_ID))
summary(filtered1_CEWL_data)## time_sec CEWL_g_m2h msmt_temp_C msmt_RH_percent
## Min. : 0.0 Min. : 0.03 Min. :24.00 Min. :21.10
## 1st Qu.:218.9 1st Qu.: 7.86 1st Qu.:24.80 1st Qu.:32.30
## Median :441.0 Median :13.27 Median :25.20 Median :33.60
## Mean :442.7 Mean :13.76 Mean :25.14 Mean :34.36
## 3rd Qu.:663.2 3rd Qu.:18.66 3rd Qu.:25.50 3rd Qu.:36.50
## Max. :901.0 Max. :37.40 Max. :26.00 Max. :48.10
##
## date lizard_ID date_time_start
## Min. :2021-10-05 417 : 887 Min. :2021-10-05 13:21:12.00
## 1st Qu.:2021-10-11 409 : 886 1st Qu.:2021-10-11 15:11:30.00
## Median :2021-10-12 410 : 886 Median :2021-10-12 15:37:50.00
## Mean :2021-10-13 412 : 886 Mean :2021-10-14 02:09:35.76
## 3rd Qu.:2021-10-18 413 : 886 3rd Qu.:2021-10-18 17:01:43.00
## Max. :2021-10-19 415 : 886 Max. :2021-10-19 16:30:20.00
## (Other):26292
## date_time
## Min. :2021-10-05 13:21:12.00
## 1st Qu.:2021-10-11 15:15:47.00
## Median :2021-10-12 15:44:59.00
## Mean :2021-10-14 02:16:58.02
## 3rd Qu.:2021-10-18 17:14:31.00
## Max. :2021-10-19 16:45:20.00
##
## [1] 401 402 404 407 409 410 412 413 415 416 417 418 419 420 421 422 423 424 425
## [20] 427 428 429 430 431 432 433 434 435 437 438 439 406 408 411 414 426 436
## 39 Levels: 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 ... 439
CEWL Equilibration
The evaporimeter also takes 1-2 minutes to equilibrate to the lizard at the start of each time series, so we want to estimate when equilibration was complete for each lizard.
ggplot(data = filtered1_CEWL_data,
aes(x = time_sec,
y = CEWL_g_m2h,
color = lizard_ID)) +
geom_point(size = 0.5, alpha = 0.5) +
geom_vline(xintercept = 60) +
geom_vline(xintercept = 120) +
xlim(0, 200)## Warning: Removed 24375 rows containing missing values (`geom_point()`).
# find max beginning value
start_peaks <- filtered1_CEWL_data %>%
dplyr::filter(time_sec < 80) %>%
group_by(lizard_ID) %>%
mutate(peak_CEWL = max(CEWL_g_m2h)) %>%
dplyr::filter(peak_CEWL == CEWL_g_m2h) %>%
# dplyr::filter(lizard_ID != 428) %>% # I think this guy just KEPT INCREASING
# BUT filtering out lizards RN makes for-loop not work
group_by(lizard_ID) %>%
summarise(peak_time = max(time_sec))
summary(start_peaks)## lizard_ID peak_time
## 401 : 1 Min. : 6.10
## 402 : 1 1st Qu.:20.40
## 404 : 1 Median :24.40
## 406 : 1 Mean :27.09
## 407 : 1 3rd Qu.:29.50
## 408 : 1 Max. :79.40
## (Other):31
Apply peak times to filter out evaporimeter equilibration, remove 60 seconds following peak CEWL during AquaFlux equibrilation to ensure Aquaflux reaches true equilibrium.
# initialize empty df
filtered2_df <- data.frame()
# lizard = lizard_ID & list =
for (lizard in unique(filtered1_CEWL_data$lizard_ID)) {
# get all data for each lizard
this_lizard <- filtered1_CEWL_data %>%
dplyr::filter(lizard_ID == lizard)
# get time of beginning peak
this_liz_peak <- start_peaks %>%
dplyr::filter(lizard_ID == lizard)
# calculate n seconds after peak
end <- floor(this_liz_peak$peak_time + 60) # +60 seconds to time of max peak
# make a list of the seconds to remove
remove <- c(0:end)
# clean each lizard's data
this_liz_cleaned <- this_lizard %>%
dplyr::filter(floor(time_sec) %nin% remove)
# append to df
filtered2_df <- filtered2_df %>%
rbind(this_liz_cleaned)
# should compile themselves into filtered_df
# which will be ready to use after running this loop
}Check that the dataframe was compiled with ALL the lizards and that the plot looks better now:
## time_sec CEWL_g_m2h msmt_temp_C msmt_RH_percent
## Min. : 67.3 Min. : 0.11 Min. :24.10 Min. :21.20
## 1st Qu.:286.4 1st Qu.: 7.38 1st Qu.:24.80 1st Qu.:32.30
## Median :485.1 Median :13.00 Median :25.20 Median :33.60
## Mean :487.6 Mean :13.43 Mean :25.13 Mean :34.39
## 3rd Qu.:686.5 3rd Qu.:18.29 3rd Qu.:25.50 3rd Qu.:36.50
## Max. :901.0 Max. :31.07 Max. :26.00 Max. :48.10
##
## date lizard_ID date_time_start
## Min. :2021-10-05 407 : 819 Min. :2021-10-05 13:21:12.00
## 1st Qu.:2021-10-11 404 : 813 1st Qu.:2021-10-11 15:11:30.00
## Median :2021-10-12 412 : 809 Median :2021-10-12 15:37:50.00
## Mean :2021-10-13 425 : 807 Mean :2021-10-14 01:38:41.63
## 3rd Qu.:2021-10-18 410 : 806 3rd Qu.:2021-10-18 17:01:43.00
## Max. :2021-10-19 424 : 806 Max. :2021-10-19 16:30:20.00
## (Other):23545
## date_time
## Min. :2021-10-05 13:22:41.00
## 1st Qu.:2021-10-11 15:16:21.00
## Median :2021-10-12 15:45:10.00
## Mean :2021-10-14 01:46:48.80
## 3rd Qu.:2021-10-18 17:13:48.00
## Max. :2021-10-19 16:45:20.00
##
## [1] 401 402 404 407 409 410 412 413 415 416 417 418 419 420 421 422 423 424 425
## [20] 427 428 429 430 431 432 433 434 435 437 438 439 406 408 411 414 426 436
## 39 Levels: 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 ... 439
ggplot(data = filtered2_df,
aes(x = time_sec,
y = CEWL_g_m2h,
color = lizard_ID)) +
geom_point(size = 0.1, alpha = 0.5) +
geom_vline(xintercept = 60) +
geom_vline(xintercept = 120)DF looks complete and relative change in CEWL looks much more reasonable now that the equilibration period was removed.
Calibrate Thermocouple Data
First, calculate and save regression curves:
# calculate regression curves
regression_a <- lm(data = calibration_data[calibration_data$lead == "A",],
temp_C ~ reference_temp)
regression_b <- lm(data = calibration_data[calibration_data$lead == "B",],
temp_C ~ reference_temp)
regression_c <- lm(data = calibration_data[calibration_data$lead == "C",],
temp_C ~ reference_temp)
regression_d <- lm(data = calibration_data[calibration_data$lead == "D",],
temp_C ~ reference_temp)
regression_e <- lm(data = calibration_data[calibration_data$lead == "E",],
temp_C ~ reference_temp)
regression_f <- lm(data = calibration_data[calibration_data$lead == "F",],
temp_C ~ reference_temp)
regression_g <- lm(data = calibration_data[calibration_data$lead == "G",],
temp_C ~ reference_temp)
regression_h <- lm(data = calibration_data[calibration_data$lead == "H",],
temp_C ~ reference_temp)
# create df to store calibration equations
TC_calibration <- data.frame(TC_A = (coef(regression_a))) %>%
`rownames<-`(c("intercept", "slope"))
TC_calibration$TC_B <- coef(regression_b)
TC_calibration$TC_C <- coef(regression_c)
TC_calibration$TC_D <- coef(regression_d)
TC_calibration$TC_E <- coef(regression_e)
TC_calibration$TC_F <- coef(regression_f)
TC_calibration$TC_G <- coef(regression_g)
TC_calibration$TC_H <- coef(regression_h)
TC_calibration## TC_A TC_B TC_C TC_D TC_E TC_F TC_G
## intercept 1.6231526 1.6611910 1.5559682 1.7000154 1.6606398 1.6289434 3.1508939
## slope 0.9890007 0.9814998 0.9943169 0.9896303 0.9869329 0.9907762 0.9756173
## TC_H
## intercept 1.8051208
## slope 0.9646041
Next, use calibration curves to correct the data we measured:
# this applies respective calibration equation
# to all cloacal thermocouple data points of that proper thermocouple
# 2=slope & 1=intercept
calibrated_TC_data <- all_thermocouple_data %>%
# do for cloacal thermocouple temps
mutate(calibrated_cloacal_temp = case_when(
thermocouple_cloacal == "A" ~ value_cloacal*TC_calibration$TC_A[2] +
TC_calibration$TC_A[1],
thermocouple_cloacal == "B" ~ value_cloacal*TC_calibration$TC_B[2] +
TC_calibration$TC_B[1],
thermocouple_cloacal == "C" ~ value_cloacal*TC_calibration$TC_C[2] +
TC_calibration$TC_C[1],
thermocouple_cloacal == "D" ~ value_cloacal*TC_calibration$TC_D[2] +
TC_calibration$TC_D[1],
thermocouple_cloacal == "E" ~ value_cloacal*TC_calibration$TC_E[2] +
TC_calibration$TC_E[1],
thermocouple_cloacal == "F" ~ value_cloacal*TC_calibration$TC_F[2] +
TC_calibration$TC_F[1],
thermocouple_cloacal == "G" ~ value_cloacal*TC_calibration$TC_G[2] +
TC_calibration$TC_G[1],
thermocouple_cloacal == "H" ~ value_cloacal*TC_calibration$TC_H[2] +
TC_calibration$TC_H[1]
),
# do for dorsal thermocouples also
calibrated_dorsal_temp = case_when(
thermocouple_dorsal == "A" ~ value_dorsal*TC_calibration$TC_A[2] +
TC_calibration$TC_A[1],
thermocouple_dorsal == "B" ~ value_dorsal*TC_calibration$TC_B[2] +
TC_calibration$TC_B[1],
thermocouple_dorsal == "C" ~ value_dorsal*TC_calibration$TC_C[2] +
TC_calibration$TC_C[1],
thermocouple_dorsal == "D" ~ value_dorsal*TC_calibration$TC_D[2] +
TC_calibration$TC_D[1],
thermocouple_dorsal == "E" ~ value_dorsal*TC_calibration$TC_E[2] +
TC_calibration$TC_E[1],
thermocouple_dorsal == "F" ~ value_dorsal*TC_calibration$TC_F[2] +
TC_calibration$TC_F[1],
thermocouple_dorsal == "G" ~ value_dorsal*TC_calibration$TC_G[2] +
TC_calibration$TC_G[1],
thermocouple_dorsal == "H" ~ value_dorsal*TC_calibration$TC_H[2] +
TC_calibration$TC_H[1]
)
)
summary(calibrated_TC_data)## date time lizard_ID
## Min. :2021-10-05 Min. :2023-10-23 13:17:44.00 406 : 1886
## 1st Qu.:2021-10-11 1st Qu.:2023-10-23 14:31:25.25 407 : 1009
## Median :2021-10-12 Median :2023-10-23 15:22:32.00 404 : 1003
## Mean :2021-10-12 Mean :2023-10-23 15:26:16.27 436 : 976
## 3rd Qu.:2021-10-18 3rd Qu.:2023-10-23 16:20:06.75 430 : 969
## Max. :2021-10-19 Max. :2023-10-23 17:48:51.00 425 : 964
## (Other):28847
## value_cloacal thermocouple_cloacal value_dorsal thermocouple_dorsal
## Min. :11.80 E :9326 Min. :12.50 A :7651
## 1st Qu.:20.60 D :5778 1st Qu.:22.20 F :6477
## Median :25.20 B :5696 Median :25.30 D :5667
## Mean :24.51 F :5595 Mean :24.95 G :3756
## 3rd Qu.:27.40 C :3633 3rd Qu.:27.30 E :3740
## Max. :38.00 A :2820 Max. :36.30 H :3633
## (Other):2806 (Other):4730
## date_time calibrated_cloacal_temp
## Min. :2021-10-05 13:17:44.00 Min. :13.86
## 1st Qu.:2021-10-11 14:12:38.25 1st Qu.:22.04
## Median :2021-10-12 15:19:27.50 Median :26.54
## Mean :2021-10-13 15:21:52.15 Mean :25.88
## 3rd Qu.:2021-10-18 17:01:30.75 3rd Qu.:28.80
## Max. :2021-10-19 16:45:25.00 Max. :38.96
##
## calibrated_dorsal_temp
## Min. :13.99
## 1st Qu.:23.72
## Median :26.81
## Mean :26.39
## 3rd Qu.:28.72
## Max. :37.59
##
Correct Thermocouple Times
The TC logger times were off by a few minutes compared to the evaporimeter, but we consistently started the evaporimeter immediately following the TC (with the exception of lizards 428, 437, 439 whose temperature data was started late and will be removed from the temperature dataset).
First, remove ~5 seconds from starting, then create time_sec variable:
TC_corrected_data <- calibrated_TC_data %>%
# remove lizards with messed up data
dplyr::filter(lizard_ID %nin% c(428, 437, 439)) %>%
group_by(lizard_ID) %>%
mutate(min_time = min(date_time)) %>%
# shift start time to more closely match that for CEWL
dplyr::filter(date_time > (min_time + 4)) %>%
group_by(lizard_ID) %>%
# put time in the same format at CEWL data
mutate(start_time = min(date_time),
time_sec = as.numeric(date_time - start_time)) %>%
# remove extra time for loggers stopped late, after the 15min period
dplyr::filter(time_sec <= (15*60))
summary(TC_corrected_data)## date time lizard_ID
## Min. :2021-10-05 Min. :2023-10-23 13:17:49.00 401 : 901
## 1st Qu.:2021-10-11 1st Qu.:2023-10-23 14:19:08.00 402 : 901
## Median :2021-10-12 Median :2023-10-23 15:17:36.50 403 : 901
## Mean :2021-10-12 Mean :2023-10-23 15:21:59.75 404 : 901
## 3rd Qu.:2021-10-18 3rd Qu.:2023-10-23 16:14:05.75 406 : 901
## Max. :2021-10-19 Max. :2023-10-23 17:48:22.00 407 : 901
## (Other):25228
## value_cloacal thermocouple_cloacal value_dorsal thermocouple_dorsal
## Min. :11.80 E :7208 Min. :12.5 A :6307
## 1st Qu.:20.60 B :5406 1st Qu.:22.2 D :5406
## Median :25.20 D :5406 Median :25.2 F :5406
## Mean :24.43 F :5406 Mean :24.9 E :3604
## 3rd Qu.:27.30 A :2703 3rd Qu.:27.3 G :3604
## Max. :37.50 C :1802 Max. :35.9 B :2703
## (Other):2703 (Other):3604
## date_time calibrated_cloacal_temp
## Min. :2021-10-05 13:17:49.00 Min. :14.06
## 1st Qu.:2021-10-11 14:16:05.25 1st Qu.:22.08
## Median :2021-10-12 14:55:29.00 Median :26.53
## Mean :2021-10-13 11:07:52.71 Mean :25.80
## 3rd Qu.:2021-10-18 15:53:44.75 3rd Qu.:28.68
## Max. :2021-10-19 16:24:08.00 Max. :38.47
##
## calibrated_dorsal_temp min_time
## Min. :13.99 Min. :2021-10-05 13:17:44.00
## 1st Qu.:23.82 1st Qu.:2021-10-11 14:08:30.00
## Median :26.89 Median :2021-10-12 14:47:54.00
## Mean :26.38 Mean :2021-10-13 11:00:17.71
## 3rd Qu.:28.62 3rd Qu.:2021-10-18 15:46:10.00
## Max. :37.05 Max. :2021-10-19 16:09:03.00
##
## start_time time_sec
## Min. :2021-10-05 13:17:49.00 Min. : 0
## 1st Qu.:2021-10-11 14:08:35.00 1st Qu.:225
## Median :2021-10-12 14:47:59.00 Median :450
## Mean :2021-10-13 11:00:22.71 Mean :450
## 3rd Qu.:2021-10-18 15:46:15.00 3rd Qu.:675
## Max. :2021-10-19 16:09:08.00 Max. :900
##
Good, each lizard should have up to 900 seconds of data, which is what we see in the histogram.
Summary Stats
Relative CEWL
starting_CEWL <- filtered2_df %>%
group_by(lizard_ID) %>%
mutate(start_time = min(time_sec)) %>%
dplyr::filter(time_sec == start_time) %>%
dplyr::select(lizard_ID,
starting_CEWL = CEWL_g_m2h)
head(starting_CEWL)## # A tibble: 6 × 2
## # Groups: lizard_ID [6]
## lizard_ID starting_CEWL
## <fct> <dbl>
## 1 401 28.5
## 2 402 25.5
## 3 404 25.5
## 4 407 28.3
## 5 409 21.2
## 6 410 7.58
## lizard_ID starting_CEWL
## 401 : 1 Min. : 4.64
## 402 : 1 1st Qu.: 9.54
## 404 : 1 Median :14.17
## 406 : 1 Mean :15.66
## 407 : 1 3rd Qu.:21.24
## 408 : 1 Max. :28.52
## (Other):31
Join Dataframes
Lizards 416 (not logged at all), 428, 437, and 439 (logger started very late), do not have thermocouple data, so they should be in the CEWL_time_series dataframe, but not the temp_time_series dataframe.
Lizards 403 and 405 have too many errors so they should have been removed completely and will not show up in either of the dataframes below:
CEWL_time_series <- filtered2_df %>%
mutate(time_sec = as.numeric(floor(time_sec))) %>%
left_join(collection_data,
by = c('date', "lizard_ID" = "individual_ID")) %>%
dplyr::select(time_sec, date,
lizard_ID, treatment,
mass_g, SVL_mm,
chamber_time_elapsed_hr,
CEWL_g_m2h,
msmt_temp_C, msmt_RH_percent) %>%
left_join(starting_CEWL, by = "lizard_ID") %>%
mutate(relative_CEWL = CEWL_g_m2h - starting_CEWL,
treatment = as.factor(treatment),
time_min = time_sec/60)
summary(CEWL_time_series)## time_sec date lizard_ID treatment
## Min. : 67.0 Min. :2021-10-05 407 : 819 Control: 9452
## 1st Qu.:286.0 1st Qu.:2021-10-11 404 : 813 Cooling: 8743
## Median :485.0 Median :2021-10-12 412 : 809 Heating:10210
## Mean :487.2 Mean :2021-10-13 425 : 807
## 3rd Qu.:686.0 3rd Qu.:2021-10-18 410 : 806
## Max. :901.0 Max. :2021-10-19 424 : 806
## (Other):23545
## mass_g SVL_mm chamber_time_elapsed_hr CEWL_g_m2h
## Min. : 8.70 Min. :60.0 Min. :0.9333 Min. : 0.11
## 1st Qu.:10.00 1st Qu.:64.0 1st Qu.:1.0000 1st Qu.: 7.38
## Median :11.30 Median :66.0 Median :1.0333 Median :13.00
## Mean :11.42 Mean :66.3 Mean :1.0991 Mean :13.43
## 3rd Qu.:12.60 3rd Qu.:69.0 3rd Qu.:1.1667 3rd Qu.:18.29
## Max. :15.30 Max. :75.0 Max. :1.5333 Max. :31.07
##
## msmt_temp_C msmt_RH_percent starting_CEWL relative_CEWL
## Min. :24.10 Min. :21.20 Min. : 4.64 Min. :-18.000
## 1st Qu.:24.80 1st Qu.:32.30 1st Qu.: 9.54 1st Qu.: -5.170
## Median :25.20 Median :33.60 Median :14.17 Median : -2.260
## Mean :25.13 Mean :34.39 Mean :15.76 Mean : -2.332
## 3rd Qu.:25.50 3rd Qu.:36.50 3rd Qu.:21.24 3rd Qu.: 0.420
## Max. :26.00 Max. :48.10 Max. :28.52 Max. : 10.800
##
## time_min
## Min. : 1.117
## 1st Qu.: 4.767
## Median : 8.083
## Mean : 8.120
## 3rd Qu.:11.433
## Max. :15.017
##
## [1] 401 402 404 407 409 410 412 413 415 416 417 418 419 420 421 422 423 424 425
## [20] 427 428 429 430 431 432 433 434 435 437 438 439 406 408 411 414 426 436
## 39 Levels: 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 ... 439
## [1] 37
temp_time_series0 <- CEWL_time_series %>%
# filter out lizards with missing TC data
dplyr::filter(lizard_ID %nin% c(428, 437, 439, 416)) %>%
left_join(TC_corrected_data,
by = c('date', 'time_sec', "lizard_ID")) %>%
dplyr::filter(complete.cases(calibrated_cloacal_temp, calibrated_dorsal_temp))
# get relative temps too
start_temp <- temp_time_series0 %>%
group_by(lizard_ID) %>%
mutate(start_time = min(time_sec)) %>%
dplyr::filter(time_sec == start_time) %>%
dplyr::select(lizard_ID,
starting_clo_temp = calibrated_cloacal_temp,
starting_dors_temp = calibrated_dorsal_temp)
temp_time_series <- temp_time_series0 %>%
left_join(start_temp, by = "lizard_ID") %>%
mutate(relative_clo_temp = calibrated_cloacal_temp - starting_clo_temp,
relative_dors_temp = calibrated_dorsal_temp - starting_dors_temp)
summary(temp_time_series)## time_sec date lizard_ID treatment
## Min. : 67.0 Min. :2021-10-05 407 : 819 Control:8652
## 1st Qu.:284.0 1st Qu.:2021-10-11 404 : 813 Cooling:8743
## Median :483.0 Median :2021-10-12 412 : 809 Heating:7856
## Mean :485.6 Mean :2021-10-13 410 : 806
## 3rd Qu.:684.0 3rd Qu.:2021-10-18 424 : 806
## Max. :900.0 Max. :2021-10-19 425 : 806
## (Other):20392
## mass_g SVL_mm chamber_time_elapsed_hr CEWL_g_m2h
## Min. : 8.80 Min. :60.00 Min. :0.9333 Min. : 0.11
## 1st Qu.:10.30 1st Qu.:64.00 1st Qu.:1.0000 1st Qu.: 6.64
## Median :11.70 Median :66.00 Median :1.0167 Median :11.78
## Mean :11.64 Mean :66.68 Mean :1.1032 Mean :12.71
## 3rd Qu.:12.70 3rd Qu.:69.00 3rd Qu.:1.1667 3rd Qu.:17.39
## Max. :15.30 Max. :75.00 Max. :1.5333 Max. :31.07
##
## msmt_temp_C msmt_RH_percent starting_CEWL relative_CEWL
## Min. :24.10 Min. :21.20 Min. : 4.64 Min. :-18.000
## 1st Qu.:24.80 1st Qu.:30.20 1st Qu.: 9.35 1st Qu.: -5.260
## Median :25.20 Median :33.60 Median :13.56 Median : -2.320
## Mean :25.14 Mean :34.37 Mean :15.22 Mean : -2.505
## 3rd Qu.:25.50 3rd Qu.:36.50 3rd Qu.:21.23 3rd Qu.: 0.380
## Max. :26.00 Max. :48.10 Max. :28.52 Max. : 9.820
##
## time_min time value_cloacal
## Min. : 1.117 Min. :2023-10-23 13:19:18.00 Min. :12.60
## 1st Qu.: 4.733 1st Qu.:2023-10-23 14:17:04.00 1st Qu.:21.20
## Median : 8.050 Median :2023-10-23 15:14:24.00 Median :25.30
## Mean : 8.093 Mean :2023-10-23 15:20:19.41 Mean :24.46
## 3rd Qu.:11.400 3rd Qu.:2023-10-23 16:11:52.50 3rd Qu.:27.20
## Max. :15.000 Max. :2023-10-23 17:48:22.00 Max. :37.50
##
## thermocouple_cloacal value_dorsal thermocouple_dorsal
## E :6163 Min. :14.60 A :5363
## F :4804 1st Qu.:22.40 D :4780
## B :4777 Median :25.20 F :4694
## D :4498 Mean :24.77 G :3201
## A :2013 3rd Qu.:27.10 E :2812
## H :1473 Max. :34.30 B :2208
## (Other):1523 (Other):2193
## date_time calibrated_cloacal_temp
## Min. :2021-10-05 13:19:18.00 Min. :14.11
## 1st Qu.:2021-10-11 14:47:47.50 1st Qu.:22.61
## Median :2021-10-12 15:15:18.00 Median :26.60
## Mean :2021-10-13 15:45:59.15 Mean :25.82
## 3rd Qu.:2021-10-18 15:58:54.50 3rd Qu.:28.51
## Max. :2021-10-19 16:24:08.00 Max. :38.47
##
## calibrated_dorsal_temp min_time
## Min. :16.06 Min. :2021-10-05 13:17:44.00
## 1st Qu.:24.14 1st Qu.:2021-10-11 14:42:50.00
## Median :26.93 Median :2021-10-12 15:07:39.00
## Mean :26.28 Mean :2021-10-13 15:37:48.54
## 3rd Qu.:28.43 3rd Qu.:2021-10-18 15:46:10.00
## Max. :35.66 Max. :2021-10-19 16:09:03.00
##
## start_time starting_clo_temp starting_dors_temp
## Min. :2021-10-05 13:17:49.00 Min. :15.77 Min. :16.06
## 1st Qu.:2021-10-11 14:42:55.00 1st Qu.:19.43 1st Qu.:24.32
## Median :2021-10-12 15:07:44.00 Median :27.37 Median :27.53
## Mean :2021-10-13 15:37:53.54 Mean :26.39 Mean :27.26
## 3rd Qu.:2021-10-18 15:46:15.00 3rd Qu.:33.54 3rd Qu.:33.07
## Max. :2021-10-19 16:09:08.00 Max. :35.32 Max. :35.66
##
## relative_clo_temp relative_dors_temp
## Min. :-20.6115 Min. :-17.5000
## 1st Qu.: -6.9403 1st Qu.: -5.2307
## Median : 0.1974 Median : 0.1978
## Mean : -0.5690 Mean : -0.9806
## 3rd Qu.: 3.7849 3rd Qu.: 1.5610
## Max. : 19.0411 Max. : 16.7141
##
## [1] 401 402 404 407 409 410 412 413 415 417 418 419 420 421 422 423 424 425 427
## [20] 429 430 431 432 433 434 435 438 406 408 411 414 426 436
## 39 Levels: 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 ... 439
# check for lizards that should be in CEWL data but not temp data
unique(CEWL_time_series$lizard_ID)[unique(CEWL_time_series$lizard_ID) %nin% unique(temp_time_series$lizard_ID)]## [1] 416 428 437 439
## 39 Levels: 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 ... 439
## [1] 33
Mean Body Temps
starts <- temp_time_series %>%
group_by(lizard_ID) %>%
dplyr::filter(time_sec == min(time_sec))
starts %>%
group_by(treatment) %>%
summarise(mean(calibrated_cloacal_temp, na.rm = T),
sd(calibrated_cloacal_temp, na.rm = T),
mean(calibrated_dorsal_temp, na.rm = T),
sd(calibrated_dorsal_temp, na.rm = T))## # A tibble: 3 × 5
## treatment `mean(calibrated_cloacal_temp, na.rm = T)` sd(cali…¹ mean(…² sd(ca…³
## <fct> <dbl> <dbl> <dbl> <dbl>
## 1 Control 27.0 1.33 27.2 1.00
## 2 Cooling 33.8 1.06 33.6 1.31
## 3 Heating 17.6 1.51 20.2 2.80
## # … with abbreviated variable names ¹`sd(calibrated_cloacal_temp, na.rm = T)`,
## # ²`mean(calibrated_dorsal_temp, na.rm = T)`,
## # ³`sd(calibrated_dorsal_temp, na.rm = T)`
ends <- temp_time_series %>%
group_by(lizard_ID) %>%
dplyr::filter(time_sec == max(time_sec))
ends %>%
group_by(treatment) %>%
summarise(mean(calibrated_cloacal_temp, na.rm = T),
sd(calibrated_cloacal_temp, na.rm = T),
mean(calibrated_dorsal_temp, na.rm = T),
sd(calibrated_dorsal_temp, na.rm = T))## # A tibble: 3 × 5
## treatment `mean(calibrated_cloacal_temp, na.rm = T)` sd(cali…¹ mean(…² sd(ca…³
## <fct> <dbl> <dbl> <dbl> <dbl>
## 1 Control 28.1 1.25 28.2 1.05
## 2 Cooling 18.1 2.91 20.1 2.73
## 3 Heating 34.3 2.33 31.3 3.11
## # … with abbreviated variable names ¹`sd(calibrated_cloacal_temp, na.rm = T)`,
## # ²`mean(calibrated_dorsal_temp, na.rm = T)`,
## # ³`sd(calibrated_dorsal_temp, na.rm = T)`
CEWL per Body Temp per Tmt
I want to know what the typical CEWL value was at a given temperature, on average for lizards within treatment groups.
# first aggregate all body temps to the nearest degree, by indiv, then by tmt
mean_CEWL_temp <- temp_time_series %>%
# round temp to nearest degree
mutate(rounded_cal_c_temp = round(calibrated_cloacal_temp, 0)) %>%
# avg each individual at each degree
group_by(lizard_ID, treatment, rounded_cal_c_temp) %>%
summarise(mean_CEWL = mean(CEWL_g_m2h)) %>%
# avg each tmt group at each degree
group_by(treatment, rounded_cal_c_temp) %>%
summarise(mean_mean_CEWL = mean(mean_CEWL),
CEWL_sd = sd(mean_CEWL),
CEWL_min = min(mean_CEWL),
CEWL_max = max(mean_CEWL),
n = n()) %>%
mutate(CEWL_sem = CEWL_sd/sqrt(n))## `summarise()` has grouped output by 'lizard_ID', 'treatment'. You can override
## using the `.groups` argument.
## `summarise()` has grouped output by 'treatment'. You can override using the
## `.groups` argument.
# save to compare to ded lizzies
write_rds(mean_CEWL_temp, "./Data/CEWL_by_temp_tmt.RDS")
# get values for heating group only
heat_CEWL <- mean_CEWL_temp %>%
dplyr::filter(treatment == "Heating") %>%
dplyr::select(treatment, rounded_cal_c_temp, heating_mm_CEWL = mean_mean_CEWL)
#write_rds(heat_CEWL, "./Data/CEWL_by_temp_heating_tmt.RDS")
# get values for cooling group only
cool_CEWL <- mean_CEWL_temp %>%
dplyr::filter(treatment == "Cooling")%>%
dplyr::select(treatment, rounded_cal_c_temp, cooling_mm_CEWL = mean_mean_CEWL)
#write_rds(cool_CEWL, "./Data/CEWL_by_temp_cooling_tmt.RDS")
# calculate the diff between groups at a common temperature
compare_CEWL <- heat_CEWL %>%
left_join(cool_CEWL, by = 'rounded_cal_c_temp') %>%
mutate(diff = heating_mm_CEWL - cooling_mm_CEWL) %>%
dplyr::filter(complete.cases(diff))
compare_CEWL## # A tibble: 20 × 6
## treatment.x rounded_cal_c_temp heating_mm_CEWL treatment.y cooling_mm…¹ diff
## <fct> <dbl> <dbl> <fct> <dbl> <dbl>
## 1 Heating 16 16.9 Cooling 4.59 12.3
## 2 Heating 17 11.0 Cooling 4.71 6.26
## 3 Heating 18 12.6 Cooling 4.83 7.78
## 4 Heating 19 14.9 Cooling 5.42 9.48
## 5 Heating 20 15.2 Cooling 5.47 9.71
## 6 Heating 21 16.5 Cooling 5.14 11.4
## 7 Heating 22 17.0 Cooling 5.30 11.7
## 8 Heating 23 17.2 Cooling 5.28 12.0
## 9 Heating 24 17.3 Cooling 5.32 12.0
## 10 Heating 25 17.4 Cooling 5.82 11.6
## 11 Heating 26 17.7 Cooling 5.96 11.7
## 12 Heating 27 18.0 Cooling 6.25 11.7
## 13 Heating 28 18.0 Cooling 6.66 11.4
## 14 Heating 29 18.0 Cooling 7.26 10.7
## 15 Heating 30 18.6 Cooling 8.09 10.5
## 16 Heating 31 18.8 Cooling 9.00 9.84
## 17 Heating 32 19.2 Cooling 10.2 9.06
## 18 Heating 33 19.6 Cooling 10.6 8.94
## 19 Heating 34 19.6 Cooling 12.1 7.52
## 20 Heating 35 21.5 Cooling 11.7 9.81
## # … with abbreviated variable name ¹cooling_mm_CEWL
# summary stats
compare_CEWL %>%
summarise(mean_CEWL_diff = mean(diff),
sd_CEWL_diff = sd(diff),
max_CEWL_diff = max(diff),
min_CEWL_diff = min(diff)) %>%
mutate(sem_CEWL_diff = sd_CEWL_diff/sqrt(nrow(compare_CEWL)))## # A tibble: 1 × 5
## mean_CEWL_diff sd_CEWL_diff max_CEWL_diff min_CEWL_diff sem_CEWL_diff
## <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 10.3 1.70 12.3 6.26 0.379
VPD at CEWL Measurement
We can calculate the VPD from the RH and temp recorded by the evaporimeter during the measurements we took.
VPD_at_CEWL <- CEWL_time_series %>%
# calculate VPD for each temp/RH recording
mutate(e_s_kPa = 0.611 * exp((17.502*msmt_temp_C)/(msmt_temp_C + 240.97)),
VPD_kPa = e_s_kPa*(1 - (msmt_RH_percent/100)),
date = as.factor(date)) %>%
# we want mean VPD experienced for each lizard
group_by(lizard_ID, date, treatment) %>%
summarise(VPD_at_msmt = mean(VPD_kPa))## `summarise()` has grouped output by 'lizard_ID', 'date'. You can override using
## the `.groups` argument.
## lizard_ID date treatment VPD_at_msmt
## 401 : 1 2021-10-05:6 Control:12 Min. :1.690
## 402 : 1 2021-10-11:8 Cooling:12 1st Qu.:1.945
## 404 : 1 2021-10-12:7 Heating:13 Median :2.148
## 406 : 1 2021-10-18:8 Mean :2.092
## 407 : 1 2021-10-19:8 3rd Qu.:2.180
## 408 : 1 Max. :2.423
## (Other):31
# now calculate mean sd, and range for dates and treatments
VPD_at_CEWL %>%
group_by(date) %>%
summarise(VPD_mean = mean(VPD_at_msmt),
VPD_sd = sd(VPD_at_msmt),
VPD_min = min(VPD_at_msmt),
VPD_max = max(VPD_at_msmt))## # A tibble: 5 × 5
## date VPD_mean VPD_sd VPD_min VPD_max
## <fct> <dbl> <dbl> <dbl> <dbl>
## 1 2021-10-05 1.72 0.0246 1.69 1.74
## 2 2021-10-11 2.18 0.0398 2.14 2.28
## 3 2021-10-12 2.37 0.0597 2.27 2.42
## 4 2021-10-18 2.15 0.0252 2.11 2.18
## 5 2021-10-19 1.98 0.0760 1.90 2.10
VPD_at_CEWL %>%
group_by(treatment) %>%
summarise(VPD_mean = mean(VPD_at_msmt),
VPD_sd = sd(VPD_at_msmt),
VPD_min = min(VPD_at_msmt),
VPD_max = max(VPD_at_msmt))## # A tibble: 3 × 5
## treatment VPD_mean VPD_sd VPD_min VPD_max
## <fct> <dbl> <dbl> <dbl> <dbl>
## 1 Control 2.08 0.214 1.70 2.37
## 2 Cooling 2.11 0.198 1.73 2.41
## 3 Heating 2.08 0.246 1.69 2.42
## [1] 2.091736
## [1] 0.2152626
## [1] 1.689619
## [1] 2.422628
Export RDS files
We export the wrangled dataframes as RDS files so they save the R formatting we created thoughout this file. If we saved them as csv’s, we would have to reformat them every time we read them into a new Rmd.